library(ggplot2)

library(forcats)
library(dplyr)
library(tidyr)
library(lme4)
library(GGally)
library(lattice)
library(INLA)
library(gstat)
library(fields)
library(reshape)
library(rgdal)
library(rgeos)
library(gridExtra)
library(plotly)

#Loading dataset

#Topo lidar data#
topo_df <- read.csv("data/lidar_final_spatialjoin.csv") %>%
  select(-ID_sectie, -join_KRIBV, -join_KD, -join_KD_nr, -join_Opp_m, -distance) #delete redundant data
nrow(topo_df)
## [1] 7890
colnames(topo_df)
##  [1] "gridcell"   "marram_san" "mean_diff"  "sd_diff"    "n"         
##  [6] "mean_15"    "mean_19"    "X"          "Y"          "dist_HW"   
## [11] "join_ID"
#data for H (joint count, H, of Marram)
jc_df <- read.csv("data/BE_joincount_2.csv") %>% 
  filter(CLASS==6) %>%#filter the marram-values; marram is class 6
  select(STD_DEVIATE, gridcell)

#data for P (proportion of Marram)
p_df <- read.csv("data/BE_classtat_2.csv") %>%
  filter(class==6) %>%#filter the marram-values; marram is class 6
  select(prop.landscape, gridcell)
  
#joining the datasets
vegetation <- topo_df %>% left_join(jc_df, by="gridcell") %>%
  left_join(p_df, by="gridcell")

#filter only cells with marram_sand_prop > 0.75 and delete NA's
vegetation <- vegetation %>% filter(marram_san >= 0.75) %>%
  drop_na()

#write.csv(vegetation_sel, "filtered_lidar_final.csv") #to check spatially the filtered data in QGIS

#renaming the data
vegetation <- vegetation %>%
  rename(H=STD_DEVIATE, P=prop.landscape, ID_sectie=join_ID)

#standardizing covariates P, H #P is between 0 and 1 so ok
vegetation <- vegetation %>% mutate(Pstd=P) %>%
  mutate(Hstd=scale(H, center=mean(H), scale=sd(H))) %>%
  mutate(Diststd=scale(dist_HW, center=mean(dist_HW), scale=sd(dist_HW))) %>%
  #mutating X and Y into km
  mutate(Xkm = X/1000, Ykm=Y/1000)

head(vegetation)
##      gridcell marram_san  mean_diff   sd_diff   n  mean_15  mean_19     X
## 1 22270199210  0.9538462 0.13518560 0.2257488 361 8.526061 8.661247 22270
## 2 22290199170  0.9910714 0.19333333 0.2213239 228 8.560675 8.754009 22290
## 3 22290199210  0.9673721 0.38110803 0.2862125 361 7.362194 7.743302 22290
## 4 22290199230  1.0000000 0.21843767 0.3680388 361 6.815493 7.033931 22290
## 5 22310199170  0.9374486 0.32186981 0.2088671 361 8.841568 9.163438 22310
## 6 22310199190  0.8368000 0.08634167 0.1714334 360 9.330414 9.416756 22310
##        Y   dist_HW ID_sectie         H          P       Pstd       Hstd
## 1 199210  77.50226         1  8.086036 0.12307692 0.12307692 -3.8419514
## 2 199170 121.15867         1 11.560220 0.02678571 0.02678571 -3.5182369
## 3 199210  82.35297         2 55.103303 0.19841270 0.19841270  0.5389819
## 4 199230  62.95012         2 55.159987 0.02640000 0.02640000  0.5442635
## 5 199170 126.00939         2 57.770901 0.19300412 0.19300412  0.7875410
## 6 199190 106.66227         2 59.892914 0.34440000 0.34440000  0.9852641
##       Diststd   Xkm    Ykm
## 1 -0.79638864 22.27 199.21
## 2  0.09358064 22.29 199.17
## 3 -0.69750316 22.29 199.21
## 4 -1.09304506 22.29 199.23
## 5  0.19246612 22.31 199.17
## 6 -0.20193953 22.31 199.19
nrow(vegetation)
## [1] 2157
#data for sand suppletion#
#get suppletion data
suppletions <- read.csv("data/volumes_droogstrand.csv", sep=";") %>%
  #only get values from 2015-2019
  filter(survey.datum %in% c("17/05/2015", "10/04/2016", "26/05/2017", "17/04/2018", "20/04/2019")) %>% 
  gather(ID_sectie, dV, X1:X266) %>%
  group_by(ID_sectie) %>% summarise(meandV=mean(dV)) %>% #calculate average of dV
  separate(ID_sectie, into=c("X", "ID_sectie"), sep="X") %>%
  mutate(ID_sectie=as.numeric(ID_sectie)) %>%
  select(-X) %>% arrange(ID_sectie)
## `summarise()` ungrouping output (override with `.groups` argument)
head(suppletions)
## # A tibble: 6 x 2
##   ID_sectie meandV
##       <dbl>  <dbl>
## 1         1  -4.18
## 2         2  -1.22
## 3         3  -2.4 
## 4         4  -2   
## 5         5   1.84
## 6         6   0.4
vegetation <- vegetation %>% left_join(suppletions, by="ID_sectie") %>% #get suppletion value into dataset
  mutate(meandVstd = scale(meandV)) #standardize it

#Data exploration

#1. Outliers
#Cleveland dotcharts see Zuur et al 2010 doi: 10.1111/j.2041-210X.2009.00001.x
par(mfrow = c(1,4))
dotchart(vegetation$P, main="P")
dotchart(vegetation$H, main="H")
dotchart(vegetation$mean_diff, main="mean_diff") #maybe two outlier in mean_diff on the left
dotchart(vegetation$sd_diff, main="sd_diff")

dotchart(vegetation$meandV, main="dV")

#2. Multicollinearity between covariates
#correlation
par(mfrow=c(1,1))

plot(Hstd~Pstd, data=vegetation) #-->bit of a parabolic relation?

plot(H~P, data=vegetation)

# myVars <- vegetation[,c("Hstd", "Pstd")]
# ggpairs(myVars)

#3. Relationships Y~X
par(mfrow=c(2,2))
plot(mean_diff~Hstd, data=vegetation)#centered around zero
plot(mean_diff~Pstd, data=vegetation)#centered around zero
plot(sd_diff~Hstd, data=vegetation)#parabolic?
plot(sd_diff~Pstd, data=vegetation)#maybe linear, depends on noise

plot(mean_diff~meandVstd, data=vegetation)
plot(sd_diff~meandVstd, data=vegetation)
plot(mean_diff~H, data=vegetation)
plot(mean_diff~P, data=vegetation)

#4. spatial plot
xyplot(Ykm~Xkm, data=vegetation,
       aspect = "iso", col = 1, pch = 16)

#5. which kind of regression?
par(mfrow=c(1,1))
hist(vegetation$mean_diff, breaks=seq(-15,6,0.5) ) #Gaussian

hist(vegetation$sd_diff, breaks=seq(0,6,0.1) ) #gamma GLM with log-link, for >0 data

#Non-spatial model with INLA

#for mean_diff
f0_diff <- mean_diff ~ Pstd + Hstd + Pstd:Hstd +
  I(Pstd^2) + I(Hstd^2) +Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd
M0_diff <- inla(f0_diff, control.compute = list(dic = TRUE, waic = TRUE),
                family = "gaussian", data = vegetation)
summary(M0_diff)
## 
## Call:
## c("inla(formula = f0_diff, family = \"gaussian\", data = vegetation, ",  "    control.compute = list(dic = TRUE, waic = TRUE))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          1.7733          2.9435          0.9301          5.6470 
## 
## Fixed effects:
##                   mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept)     0.3407 0.0231     0.2954   0.3407     0.3860  0.3407   0
## Pstd            0.0895 0.1403    -0.1860   0.0895     0.3649  0.0895   0
## Hstd            0.0292 0.0241    -0.0181   0.0292     0.0765  0.0292   0
## I(Pstd^2)      -0.4275 0.1615    -0.7447  -0.4275    -0.1106 -0.4275   0
## I(Hstd^2)      -0.0037 0.0078    -0.0190  -0.0037     0.0115 -0.0037   0
## Diststd        -0.1490 0.0102    -0.1691  -0.1490    -0.1291 -0.1490   0
## meandVstd       0.0841 0.0097     0.0651   0.0841     0.1032  0.0841   0
## Pstd:Hstd      -0.2026 0.1341    -0.4659  -0.2026     0.0604 -0.2026   0
## Hstd:I(Pstd^2)  0.0711 0.1556    -0.2344   0.0711     0.3763  0.0711   0
## Pstd:I(Hstd^2) -0.0339 0.0206    -0.0744  -0.0339     0.0065 -0.0339   0
## 
## The model has no random effects
## 
## Model hyperparameters:
##                                          mean     sd 0.025quant 0.5quant
## Precision for the Gaussian observations 5.407 0.1627      5.093    5.406
##                                         0.975quant  mode
## Precision for the Gaussian observations      5.732 5.402
## 
## Expected number of effective parameters(std dev): 10.07(0.0022)
## Number of equivalent replicates : 214.17 
## 
## Deviance Information Criterion (DIC) ...............: 2495.01
## Deviance Information Criterion (DIC, saturated) ....: 2171.12
## Effective number of parameters .....................: 11.11
## 
## Watanabe-Akaike information criterion (WAIC) ...: 2501.18
## Effective number of parameters .................: 16.92
## 
## Marginal log-Likelihood:  -1315.55 
## Posterior marginals for linear predictor and fitted values computed
#for sd_diff
f0_err <- sd_diff ~ Pstd + Hstd + Pstd:Hstd +
  I(Pstd^2) + I(Hstd^2) +Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd
M0_err <- inla(f0_err, control.compute = list(dic = TRUE, waic = TRUE),
               #verbose=TRUE,
               family = "gamma", data = vegetation)
summary(M0_err)
## 
## Call:
## c("inla(formula = f0_err, family = \"gamma\", data = vegetation, control.compute = list(dic = TRUE, ",  "    waic = TRUE))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.8861          4.9629          0.5647          6.4138 
## 
## Fixed effects:
##                   mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept)    -0.9195 0.0333    -0.9845  -0.9196    -0.8539 -0.9199   0
## Pstd           -0.9225 0.2001    -1.3161  -0.9222    -0.5306 -0.9217   0
## Hstd            0.0057 0.0356    -0.0640   0.0056     0.0757  0.0055   0
## I(Pstd^2)      -0.3252 0.2293    -0.7740  -0.3257     0.1260 -0.3267   0
## I(Hstd^2)      -0.0283 0.0110    -0.0497  -0.0284    -0.0065 -0.0286   0
## Diststd        -0.1681 0.0145    -0.1966  -0.1681    -0.1395 -0.1681   0
## meandVstd      -0.0354 0.0139    -0.0628  -0.0354    -0.0081 -0.0353   0
## Pstd:Hstd       0.1851 0.2010    -0.2116   0.1858     0.5775  0.1873   0
## Hstd:I(Pstd^2) -0.2571 0.2300    -0.7059  -0.2581     0.1967 -0.2600   0
## Pstd:I(Hstd^2)  0.0155 0.0290    -0.0411   0.0154     0.0728  0.0151   0
## 
## The model has no random effects
## 
## Model hyperparameters:
##                                                mean     sd 0.025quant
## Precision parameter for the Gamma observations 2.60 0.0737      2.457
##                                                0.5quant 0.975quant  mode
## Precision parameter for the Gamma observations    2.599      2.747 2.597
## 
## Expected number of effective parameters(std dev): 10.03(0.001)
## Number of equivalent replicates : 214.97 
## 
## Deviance Information Criterion (DIC) ...............: -2195.14
## Deviance Information Criterion (DIC, saturated) ....: 2307.50
## Effective number of parameters .....................: 11.07
## 
## Watanabe-Akaike information criterion (WAIC) ...: -2190.88
## Effective number of parameters .................: 14.78
## 
## Marginal log-Likelihood:  1037.66 
## Posterior marginals for linear predictor and fitted values computed
##Checking assumptions
#residuals
Fit0_diff <- M0_diff$summary.fitted.values[1:nrow(vegetation),"mean"]
E0_diff   <- vegetation$mean_diff - Fit0_diff

Fit0_err <- M0_err$summary.fitted.values[1:nrow(vegetation),"mean"]
E0_err <- vegetation$sd_diff - Fit0_err

plot_assump <- function(Fit0, E0){
  par(mfrow=c(2,2))
  # Homogeneity
  plot(x = Fit0, y = E0)
  abline(h = 0, v = 0)
  # Normality
  hist(E0, breaks = 25)
  # Independence due to model misfit
  plot(x = vegetation$P, y = E0)
  abline(h = 0)
  plot(x=vegetation$Hstd, y = E0)
  abline(h = 0)
}

plot_assump(Fit0_diff, E0_diff)

plot_assump(Fit0_err, E0_err)

#Variogram to look at spatial autocorrelation
##Semivariogram of residuals
vegetation <- vegetation %>% mutate(
  mu0_diff = M0_diff$summary.fitted.values$mean,
  Var0_diff = (M0_diff$summary.fitted.values$sd)^2,
  Res_diff = mean_diff-mu0_diff, #Pearson's residuals, for Generalized LM, would be /sqrt(Var0)
  mu0_err = M0_err$summary.fitted.values$mean,
  Res_err = mean_diff-mu0_err)

V0_diff <- variogram(Res_diff ~ 1, locations = ~Xkm + Ykm,
                     data = as.data.frame(vegetation), cressie = TRUE,
                     cutoff=5)
V0_err <- variogram(Res_err ~ 1, locations = ~Xkm + Ykm,
                    data = as.data.frame(vegetation), cressie = TRUE,
                    cutoff=5)

plot_spatialres <- function(Res){
  ##Spatial pattern in residuals?
  #plotting on map
  MyCex <- 3*abs(Res)/max(Res) +0.5
  Sign <- as.numeric(Res>=0)+1
  MyPch <- c(1,16)[Sign]
  xyplot(Ykm~Xkm, data=vegetation, aspect="iso",
         cex=MyCex, pch=MyPch, col=MyPch)
}

#spatial plot for mean_diff
plot(V0_diff)

plot_spatialres(Res=vegetation$Res_diff)

#spatial plot for sd_diff
plot(V0_err)

plot_spatialres(Res=vegetation$Res_err)

#fitted data vs real data
p_pred_diff <- ggplot(data= vegetation, aes(x=mean_diff, y=mu0_diff))+
  geom_point()+
  geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
  ggtitle("Prediciton ~ Real value for mean_diff")
p_pred_err <- ggplot(data=vegetation, aes(x=sd_diff, y=exp(mu0_err)) )+
  geom_point()+
  geom_abline(intercept=0, slope=1, linetype="longdash", color="red")+
  ggtitle("Prediciton ~ Real value for sd_diff")

par(mfrow=c(1,2))
p_pred_diff

p_pred_err

#Spatial model with INLA

#check distribution of distances between datapoints
Loc <- cbind(vegetation$Xkm, vegetation$Ykm)
D <- dist(Loc)
hist(D,freq = TRUE, main = "",
     xlab = "Distance between sites (km)",
     ylab = "Frequency")
abline(v = 50, lty = 2, col = 2)

plot(x = sort(D),
     y = (1:length(D))/length(D),
     type = "l",
     xlab = "Distance between sites (km)",
     ylab = "Cumulative proportion")
abline(h = 0.1, lty = 2, col = 2)

#make a mesh (to approximate spatial field on)
RangeGuess_diff <- 1
MaxEdge_diff <- RangeGuess_diff / 5
mesh_diff <- inla.mesh.2d(Loc,max.edge = c(3,5)*MaxEdge_diff, cutoff = MaxEdge_diff/5)#good way to start according to Zuur INLA book
plot(mesh_diff, asp=1)

mesh_diff$n
## [1] 2249
RangeGuess_err <- 1
MaxEdge_err <- RangeGuess_err/5
mesh_err <- inla.mesh.2d(Loc, max.edge = c(3,5)*MaxEdge_err, cutoff = MaxEdge_err/5)
mesh_err$n
## [1] 2249
#2. making a stack (to link mesh with data)
A_diff <- inla.spde.make.A(mesh_diff, loc = Loc)
A_err <- inla.spde.make.A(mesh_err, loc=Loc)
spde_diff <- inla.spde2.pcmatern(mesh_diff, prior.range = c(1, 0.5), prior.sigma = c(1.6, 0.05))#Differnce [-2.8,3.6] range/4
spde_err  <- inla.spde2.pcmatern(mesh_err,  prior.range = c(1, 0.5), prior.sigma = c(1.375, 0.05))#sd_diff [0.015, 5.5]

w1.index_diff <- inla.spde.make.index('w', n.spde = spde_diff$n.spde)
w1.index_err <- inla.spde.make.index('w', n.spde = spde_err$n.spde)

Xm <- model.matrix(~ Pstd * Hstd * Diststd*meandVstd, data = vegetation)
N <- nrow(vegetation)
X <- data.frame(Pstd = Xm[,2], Hstd = Xm[,3], Diststd= Xm[,4], meandVstd=Xm[,5])

Stack1_diff <- inla.stack(tag = "Fit", data = list(y = vegetation$mean_diff),  
                          A = list(1, 1, A_diff), effects = list(Intercept = rep(1, N),
                                                                 X = as.data.frame(X), w = w1.index_diff))
Stack1_err <- inla.stack(tag = "Fit", data = list(y = vegetation$sd_diff),  
                         A = list(1, 1, A_err), effects = list(Intercept = rep(1, N),
                                                               X = as.data.frame(X), w = w1.index_err))

#predictions have to be calculated during model fitting with inla (inlabru can do it afterwards)
#predictions are calculated for a 'average' spatial position
#so A doesn't have the spatial components of the spde
Stackpred_diff <- inla.stack(tag = "Covariates", data = list(y = NA),
                             A = list(1, 1), effects = list(Intercept = rep(1, N),
                                                            Xp = as.data.frame(X)))
Stackpred_err <- inla.stack(tag = "Covariates", data = list(y = NA),
                            A = list(1, 1), effects = list(Intercept = rep(1, N),
                                                           Xp = as.data.frame(X)))

#interpolations for prediction surface
interpol_df <- tibble(Pstd=seq(0,1, by=0.01), H=seq(20,70,by=0.5)) %>%
  tidyr::expand(Pstd, H) %>%
  mutate(Hstd=scale(H, center=mean(vegetation$H), scale=sd(vegetation$H))) %>%
  tibble::add_column(Diststd = mean(vegetation$Diststd)) %>%#give mean distHW
  tibble::add_column(meandVstd = mean(vegetation$meandVstd))#give mean suppletion meandV

Xm_inter <- model.matrix(~ Pstd * Hstd * Diststd * meandVstd, data = interpol_df)
N_inter <- nrow(interpol_df)
X_inter <- data.frame(Pstd = Xm_inter[,2], Hstd = Xm_inter[,3], Diststd= Xm_inter[,4], meandVstd=Xm_inter[,5])

####TO ADD#####  
Stackinter_diff  <- inla.stack(tag= "Interpolations", data = list(y = NA),
                               A = list(1, 1), effects = list(Intercept = rep(1, N_inter),
                                                              Xi = as.data.frame(X_inter)))
Stackinter_err <- inla.stack(tag= "Interpolations", data = list(y = NA),
                               A = list(1, 1), effects = list(Intercept = rep(1, N_inter),
                                                              Xi = as.data.frame(X_inter)))

Stack_diff <- inla.stack(Stack1_diff, Stackpred_diff, Stackinter_diff)
Stack_err  <- inla.stack(Stack1_err,  Stackpred_err, Stackinter_err)

#3. specify model formula
f_diff <- y ~ -1 + Intercept + Pstd + Hstd + Pstd:Hstd + I(Pstd^2) + I(Hstd^2) +
  Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd +
  f(w, model=spde_diff)

f_err <- y ~ -1 + Intercept + Pstd + Hstd + Pstd:Hstd + I(Pstd^2) + I(Hstd^2) +
  Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd +
  f(w, model=spde_err)

M1_diff <- inla(f_diff, family = "gaussian", data = inla.stack.data(Stack_diff),
                control.compute = list(dic = TRUE, config=TRUE), 
                #control.family = list(hyper = list(prec = list(param = c(1, 0.2)))),
                control.predictor = list(A = inla.stack.A(Stack_diff)))
M1_err <- inla(f_err, family = "gamma", data = inla.stack.data(Stack_err),
               control.compute = list(dic = TRUE, config=TRUE), #verbose=TRUE,
               control.predictor = list(A = inla.stack.A(Stack_err)))

summary(M1_diff)
## 
## Call:
## c("inla(formula = f_diff, family = \"gaussian\", data = inla.stack.data(Stack_diff), ",  "    control.compute = list(dic = TRUE, config = TRUE), control.predictor = list(A = inla.stack.A(Stack_diff)))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          2.0614        139.6663          3.2018        144.9295 
## 
## Fixed effects:
##                   mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## Intercept       0.2866 0.0936     0.1087   0.2835     0.4824  0.2782   0
## Pstd            0.7910 0.1195     0.5562   0.7910     1.0252  0.7912   0
## Hstd            0.0492 0.0190     0.0119   0.0492     0.0864  0.0492   0
## I(Pstd^2)      -0.9358 0.1328    -1.1964  -0.9359    -0.6751 -0.9360   0
## I(Hstd^2)       0.0078 0.0061    -0.0041   0.0078     0.0197  0.0078   0
## Diststd        -0.2258 0.0360    -0.2988  -0.2251    -0.1569 -0.2236   0
## meandVstd       0.0300 0.0435    -0.0571   0.0306     0.1136  0.0320   0
## Pstd:Hstd      -0.2214 0.1056    -0.4288  -0.2214    -0.0142 -0.2215   0
## Hstd:I(Pstd^2)  0.0505 0.1211    -0.1874   0.0505     0.2882  0.0506   0
## Pstd:I(Hstd^2) -0.0367 0.0165    -0.0690  -0.0367    -0.0044 -0.0367   0
## 
## Random effects:
## Name   Model
##  w   SPDE2 model 
## 
## Model hyperparameters:
##                                            mean     sd 0.025quant 0.5quant
## Precision for the Gaussian observations 12.6147 0.4571    11.7350  12.6080
## Range for w                              0.3157 0.0602     0.2176   0.3087
## Stdev for w                              0.6254 0.0830     0.4831   0.6180
##                                         0.975quant    mode
## Precision for the Gaussian observations    13.5348 12.5972
## Range for w                                 0.4532  0.2943
## Stdev for w                                 0.8086  0.6017
## 
## Expected number of effective parameters(std dev): 381.11(13.70)
## Number of equivalent replicates : 5.66 
## 
## Deviance Information Criterion (DIC) ...............: 1041.87
## Deviance Information Criterion (DIC, saturated) ....: 2545.17
## Effective number of parameters .....................: 383.42
## 
## Marginal log-Likelihood:  -842.31 
## Posterior marginals for linear predictor and fitted values computed
summary(M1_err)
## 
## Call:
## c("inla(formula = f_err, family = \"gamma\", data = inla.stack.data(Stack_err), ",  "    control.compute = list(dic = TRUE, config = TRUE), control.predictor = list(A = inla.stack.A(Stack_err)))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          1.2803        793.6152          2.7736        797.6690 
## 
## Fixed effects:
##                   mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## Intercept      -1.3858 0.0809    -1.5472  -1.3852    -1.2281 -1.3839   0
## Pstd            0.5340 0.1847     0.1709   0.5342     0.8961  0.5345   0
## Hstd            0.0298 0.0296    -0.0282   0.0298     0.0880  0.0298   0
## I(Pstd^2)      -1.2180 0.2055    -1.6209  -1.2182    -0.8145 -1.2186   0
## I(Hstd^2)      -0.0173 0.0093    -0.0354  -0.0174     0.0009 -0.0174   0
## Diststd        -0.3364 0.0440    -0.4250  -0.3357    -0.2519 -0.3342   0
## meandVstd       0.0381 0.0524    -0.0656   0.0382     0.1406  0.0386   0
## Pstd:Hstd       0.0236 0.1653    -0.3016   0.0238     0.3474  0.0242   0
## Hstd:I(Pstd^2) -0.1194 0.1901    -0.4920  -0.1197     0.2543 -0.1203   0
## Pstd:I(Hstd^2)  0.0135 0.0255    -0.0364   0.0135     0.0636  0.0134   0
## 
## Random effects:
## Name   Model
##  w   SPDE2 model 
## 
## Model hyperparameters:
##                                                  mean     sd 0.025quant
## Precision parameter for the Gamma observations 5.3746 0.1849     5.0183
## Range for w                                    0.1863 0.0230     0.1471
## Stdev for w                                    0.6939 0.0441     0.6127
##                                                0.5quant 0.975quant   mode
## Precision parameter for the Gamma observations   5.3721     5.7462 5.3680
## Range for w                                      0.1842     0.2372 0.1792
## Stdev for w                                      0.6919     0.7863 0.6870
## 
## Expected number of effective parameters(std dev): 387.90(13.50)
## Number of equivalent replicates : 5.561 
## 
## Deviance Information Criterion (DIC) ...............: -3526.99
## Deviance Information Criterion (DIC, saturated) ....: 2613.28
## Effective number of parameters .....................: 387.69
## 
## Marginal log-Likelihood:  1469.07 
## Posterior marginals for linear predictor and fitted values computed
#save the mesh and models to load later, as these models take long to run
#save(mesh_diff, file="output/mesh_diff.rda")
#save(mesh_err, file="output/mesh_err.rda")
#save(M1_diff, file="output/M1_diff.rda")
#save(M1_err, file="output/M1_err.rda")

###Plotting spatial autocorrelation of residuals
##Fitted values and residuals
#index of the fitted values
indexFit_diff <- inla.stack.index(Stack_diff, tag = "Fit")$data #fitted data (with SAC)
indexCov_diff <- inla.stack.index(Stack_diff, tag = "Covariates")$data #predictions
indexInt_diff <- inla.stack.index(Stack_diff, tag = "Interpolations")$data #interpolations

indexFit_err  <- inla.stack.index(Stack_err,  tag= "Fit")$data
indexCov_err  <- inla.stack.index(Stack_err,  tag="Covariates")$data
indexInt_err  <- inla.stack.index(Stack_err,  tag="Interpolations")$data 

#Difference
vegetation$Fit_diff_sp <- M1_diff$summary.fitted.values[indexFit_diff,"mean"]
vegetation$Pred_diff_sp <- M1_diff$summary.fitted.values[indexCov_diff, "mean"]
vegetation$Res_diff_sp   <- vegetation$mean_diff - vegetation$Fit_diff_sp
interpol_df$Int_diff_mean <- M1_diff$summary.fitted.values[indexInt_diff, "mean"]
interpol_df$Int_diff_sd <- M1_diff$summary.fitted.values[indexInt_diff, "sd"]
interpol_df$Int_diff_0.025quant <- M1_diff$summary.fitted.values[indexInt_diff, "0.025quant"]
interpol_df$Int_diff_0.975quant <- M1_diff$summary.fitted.values[indexInt_diff, "0.975quant"]

#Error
vegetation$Fit_err_sp <- M1_err$summary.fitted.values[indexFit_err, "mean"]
vegetation$Pred_err_sp <- M1_err$summary.fitted.values[indexCov_err, "mean"]
vegetation$Res_err_sp <- vegetation$sd_diff - vegetation$Fit_err_sp
interpol_df$Int_err_mean <- M1_err$summary.fitted.values[indexInt_err, "mean"]
interpol_df$Int_err_sd <- M1_diff$summary.fitted.values[indexInt_err, "sd"]
interpol_df$Int_err_0.025quant <- M1_err$summary.fitted.values[indexInt_err, "0.025quant"]
interpol_df$Int_err_0.975quant <- M1_err$summary.fitted.values[indexInt_err, "0.975quant"]

#calculate variogram
V0_diff <- variogram(Res_diff ~ 1, locations=~Xkm + Ykm,
                     data=as.data.frame(vegetation), cressie=T, cutoff=1)
V1_diff <- variogram(Res_diff_sp ~ 1, locations = ~Xkm + Ykm,
                     data = as.data.frame(vegetation), cressie = TRUE, cutoff=1)

V0_err <- variogram(Res_err ~ 1, locations=~Xkm + Ykm,
                    data=as.data.frame(vegetation), cressie=T, cutoff=1)
V1_err <- variogram(Res_err_sp ~ 1, locations = ~Xkm + Ykm,
                    data = as.data.frame(vegetation), cressie = TRUE, cutoff=1)

#plotting variograms
p_variogram_diff <- ggplot() + 
  geom_point(data = V0_diff, aes(x = dist, y = gamma)) + 
  geom_line(data = V0_diff, aes(x = dist, y = gamma),col = "red")+
  geom_point(data = V1_diff,aes(x = dist, y = gamma)) + 
  geom_line(data = V1_diff,aes(x = dist, y = gamma),col = "blue")+
  #scale_x_continuous(limits=c(0,50))+
  ggtitle("Semi-variogram for mean_diff")+
  xlab("Distance (km)") +
  ylab("Cressie's Semi-variance") + 
  theme(text = element_text(size = 15)) + 
  theme_classic()
  #theme(legend.position="none")  
#ylim(0,1.5)
p_variogram_diff

p_variogram_err <- ggplot() +
  geom_point(data = V0_err, aes(x = dist, y = gamma)) +
  geom_line(data = V0_err, aes(x = dist, y = gamma),col = "red")+
  geom_point(data = V1_err,aes(x = dist, y = gamma)) +
  geom_line(data = V1_err,aes(x = dist, y = gamma),col = "blue")+
  #scale_x_continuous(limits=c(0,50))+
  ggtitle("Semi-variogram for sd_diff")+
  xlab("Distance") +
  ylab("Cressie's Semi-variance") +
  theme(text = element_text(size = 15)) +
  theme_classic()
  #theme(legend.position="none")
#ylim(0,1.5)
p_variogram_err

#plotting spatial autocorrelation on map
Fit_diff_sp <- vegetation$Fit_diff_sp
Res_diff_sp <- vegetation$Res_diff_sp

MyCex <- 3*abs(Res_diff_sp)/max(Res_diff_sp) +0.5
Sign <- as.numeric(Res_diff_sp>=0)+1
MyPch <- c(1,16)[Sign]
xyplot(Ykm~Xkm, data=vegetation, aspect="iso",
       cex=MyCex, pch=MyPch, col=MyPch, main="Residuals Diff, color is sign, size~size")

Fit_err_sp <- vegetation$Fit_err_sp
Res_err_sp <- vegetation$Res_err_sp

MyCex <- 3*abs(Res_err_sp)/max(Res_err_sp) +0.5
Sign <- as.numeric(Res_err_sp>=0)+1
MyPch <- c(1,16)[Sign]
xyplot(Ykm~Xkm, data=vegetation, aspect="iso",
       cex=MyCex, pch=MyPch, col=MyPch, main="Residuals sd_diff, color is sign, size~size")

###Checking assumptions for spatial model####
plot_assump <- function(Fit0, E0){
  par(mfrow=c(2,2))
  # Homogeneity
  plot(x = Fit0, y = E0)
  abline(h = 0, v = 0)
  # Normality
  hist(E0, breaks = 25)
  # Independence due to model misfit
  plot(x = vegetation$Pstd, y = E0)
  abline(h = 0)
  plot(x=vegetation$Hstd, y = E0)
  abline(h = 0)
}

plot_assump(vegetation$Fit_diff_sp, vegetation$Res_diff_sp)

plot_assump(vegetation$Fit_err_sp, vegetation$Res_err_sp)

#Fitted data (with correction for SAR added to the data)
p_fit_diff <- ggplot(data= vegetation, aes(x=mean_diff, y=Fit_diff_sp))+
  geom_point()+
  geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
  ggtitle("Fitted values ~ Real value for mean_diff")
p_fit_err <- ggplot(data= vegetation, aes(x=sd_diff, y=Fit_err_sp))+
  geom_point()+
  geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
  ggtitle("Fitted values ~ Real value for sd_diff")

p_fit_diff

p_fit_err

#predicted data (SAR not added, but only used to estimate the fixed effects; these are the kind of predictions possible for extrapolation)
p_pred_diff <- ggplot(data= vegetation, aes(x=mean_diff, y=Pred_diff_sp))+
  geom_point()+
  geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
  ggtitle("Predicted values ~ Real value for mean_diff")
p_pred_err <- ggplot(data= vegetation, aes(x=sd_diff, y=exp(Pred_err_sp)))+
  geom_point()+
  geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
  ggtitle("Predicted values ~ Real value for sd_diff")

p_pred_diff

p_pred_err

#Regression coefficients

###mean_diff

post_beta0_diff <- M0_diff$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
post_beta1_diff <- M1_diff$summary.fixed[, c("mean", "0.025quant", "0.975quant")]

NumberOfBetas <- nrow(M1_diff$summary.fixed) 
Combined <- rbind(post_beta0_diff, post_beta1_diff)
Combined$WhichModel <- rep(c("Non-spatial", "Spatial"),each = NumberOfBetas)
Combined$WhichVariable <- rep(rownames(M1_diff$summary.fixed), 2)
colnames(Combined) <- c("Mean", "Lo", "Up", "WhichModel", "WhichVariable")
Combined
##                         Mean           Lo           Up  WhichModel
## (Intercept)      0.340747255  0.295415160  0.386041368 Non-spatial
## Pstd             0.089522695 -0.186049188  0.364863640 Non-spatial
## Hstd             0.029203198 -0.018124738  0.076491476 Non-spatial
## I(Pstd^2)       -0.427533574 -0.744693748 -0.110639131 Non-spatial
## I(Hstd^2)       -0.003722869 -0.019001064  0.011542524 Non-spatial
## Diststd         -0.149049481 -0.169063939 -0.129051792 Non-spatial
## meandVstd        0.084123643  0.065064650  0.103166666 Non-spatial
## Pstd:Hstd       -0.202613227 -0.465863525  0.060416493 Non-spatial
## Hstd:I(Pstd^2)   0.071082343 -0.234386954  0.376295665 Non-spatial
## Pstd:I(Hstd^2)  -0.033915739 -0.074401464  0.006536061 Non-spatial
## Intercept        0.286614928  0.108699935  0.482444004     Spatial
## Pstd1            0.790961761  0.556192338  1.025185413     Spatial
## Hstd1            0.049156647  0.011888611  0.086377262     Spatial
## I(Pstd^2)1      -0.935822224 -1.196438282 -0.675123385     Spatial
## I(Hstd^2)1       0.007784189 -0.004137936  0.019689792     Spatial
## Diststd1        -0.225802025 -0.298758515 -0.156891233     Spatial
## meandVstd1       0.029974004 -0.057117624  0.113551056     Spatial
## Pstd:Hstd1      -0.221429435 -0.428757480 -0.014198983     Spatial
## Hstd:I(Pstd^2)1  0.050524434 -0.187383620  0.288154076     Spatial
## Pstd:I(Hstd^2)1 -0.036712028 -0.069021629 -0.004430045     Spatial
##                  WhichVariable
## (Intercept)          Intercept
## Pstd                      Pstd
## Hstd                      Hstd
## I(Pstd^2)            I(Pstd^2)
## I(Hstd^2)            I(Hstd^2)
## Diststd                Diststd
## meandVstd            meandVstd
## Pstd:Hstd            Pstd:Hstd
## Hstd:I(Pstd^2)  Hstd:I(Pstd^2)
## Pstd:I(Hstd^2)  Pstd:I(Hstd^2)
## Intercept            Intercept
## Pstd1                     Pstd
## Hstd1                     Hstd
## I(Pstd^2)1           I(Pstd^2)
## I(Hstd^2)1           I(Hstd^2)
## Diststd1               Diststd
## meandVstd1           meandVstd
## Pstd:Hstd1           Pstd:Hstd
## Hstd:I(Pstd^2)1 Hstd:I(Pstd^2)
## Pstd:I(Hstd^2)1 Pstd:I(Hstd^2)
p_posteriors_diff <- ggplot() + 
  geom_point(data = Combined, aes(x = WhichModel, y = Mean))+ 
  geom_errorbar(data = Combined, aes(x = WhichModel, ymax = Up, ymin = Lo, color=WhichModel), width=0.2)+
  xlab("Parameters") + ylab("Values")+
  geom_hline(yintercept=0, linetype="dashed", color="red")+
  theme(text = element_text(size = 15)) +
  facet_wrap( ~ WhichVariable, scales = "free_y")+
  theme(legend.position="none") +
  ggtitle("Posteriors (effect sizes) for mean_diff")
p_posteriors_diff

p_diff <- ggplot(filter(Combined, WhichModel=="Spatial") %>% mutate(WhichVariable = fct_reorder(WhichVariable, Mean))) +
  geom_pointrange() +
  geom_hline(yintercept = 0, linetype = 2) +
  aes(x = WhichVariable, y =Mean, ymin = Lo, ymax = Up) +
  xlab("covariate")+ ylab("Effect size")+
  ggtitle("Posteriors (effect sizes) for mean_diff")+
# 
#   scale_color_manual(values = c("SLA1" = "#66CD00",
#                                 "SLA2" = "#cfefb0")) +
  theme_classic() +
  coord_flip()
p_diff

###sd_diff
post_beta0_err <- M0_err$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
post_beta1_err <- M1_err$summary.fixed[, c("mean", "0.025quant", "0.975quant")]

NumberOfBetas <- nrow(M1_err$summary.fixed)
Combined <- rbind(post_beta0_err, post_beta1_err)
Combined$WhichModel <- rep(c("Non-spatial", "Spatial"),each = NumberOfBetas)
Combined$WhichVariable <- rep(rownames(M1_err$summary.fixed), 2)
colnames(Combined) <- c("Mean", "Lo", "Up", "WhichModel", "WhichVariable")
# Combined

p_posteriors_err <- ggplot() +
  geom_point(data = Combined, aes(x = WhichModel, y = Mean))+
  geom_errorbar(data = Combined, aes(x = WhichModel, ymax = Up, ymin = Lo, color=WhichModel), width=0.2)+
  xlab("Parameters") + ylab("Values")+
  geom_hline(yintercept=0, linetype="dashed", color="red")+
  theme(text = element_text(size = 15)) +
  facet_wrap( ~ WhichVariable, scales = "free_y")+
  theme(legend.position="none")+
  ggtitle("Posteriors (effect sizes) for sd_diff")
p_posteriors_err

p_err <- ggplot(filter(Combined, WhichModel=="Spatial") %>% mutate(WhichVariable = fct_reorder(WhichVariable, Mean))) +
  geom_pointrange() +
  geom_hline(yintercept = 0, linetype = 2) +
  aes(x = WhichVariable, y =Mean, ymin = Lo, ymax = Up) +
  xlab("covariate")+ ylab("Effect size")+
  ggtitle("Posteriors (effect sizes) for sd_diff")+
# 
#   scale_color_manual(values = c("SLA1" = "#66CD00",
#                                 "SLA2" = "#cfefb0")) +
  theme_classic() +
  coord_flip()
p_err

#Plots of spatial field

###Spatial field plots####
#contour
DSN <- "data/Study_Area_westkust.shp"
boundary <- readOGR(dsn = DSN, layer = "Study_Area_westkust")
## OGR data source with driver: ESRI Shapefile 
## Source: "D:\fbatslee\OneDrive - UGent\Endure\DuneTopoMarram\data\Study_Area_westkust.shp", layer: "Study_Area_westkust"
## with 1 features
## It has 1 fields
## Integer64 fields read as strings:  id
x1 <- c(22100, 22100, 39900, 39900) 
y1 <- c(198700, 209000, 209000, 198700)
# Make a polygon of it.
AreaPoly <- Polygon(cbind(x1, y1), hole = FALSE)
AreaSP   <- SpatialPolygons(list(Polygons(list(AreaPoly), ID = '1')))
AreaSP@proj4string  <- boundary@proj4string
#define what outside the raster
Outraster <- gDifference(AreaSP, boundary)
#plot(Outraster)

##Spatial field for mean_diff##
w.pm <- M1_diff$summary.random$w$mean
w.proj <- inla.mesh.projector(mesh_diff)
w.pmf <- inla.mesh.project(w.proj, w.pm)


xygrid <- expand.grid(w.proj$x, w.proj$y)
Data3D <- data.frame(x = xygrid[,1],
                     y = xygrid[,2],
                     z = melt(w.pmf)[,3])
names(Data3D) <- c("x", "y", "z")
#Data3D <- Data3D %>% filter((x>=22.100 & x<=81.200) & (y>=197.600 & y<=229.800))

p_sf_diff <- ggplot(Data3D, 
                    aes(x, y, z = z),
                    col = rgb(1, 0.5, 0.5, 0.7)) +
  stat_contour(geom="polygon", aes(fill = ..level..)) +
  geom_raster(aes(fill = z)) +
  labs(fill="w.pm")+
  # xlim(23231,23332)+
  # ylim(197851,197902)+
  geom_polygon(data=fortify(Outraster), aes(x=long/1000, y=lat/1000),
               #fill="white",
               alpha=0.5,
               color="white", inherit.aes=FALSE)+
  #geom_polygon(data=fortify(Outraster), aes(x=lat, y=long, z=NULL), color="black", fill=NA)+
  #stat_contour(geom="polygon", aes(fill = ..level..)) +
  #stat_contour(aes(colour = ..level..))+
  xlab("x (Km)")+
  ylab("y (Km)")+
  xlim(c(22.100, 39.900))+
  ylim(c(198.700, 209.000))+
  scale_fill_distiller(palette="PuOr")+
  theme_classic()+
  coord_fixed(ratio = 1)
p_sf_diff
## Warning: Removed 6488 rows containing non-finite values (stat_contour).
## Warning: Removed 6315 rows containing missing values (geom_raster).

##Spatial field for sd_diff#
w.pm <- M1_err$summary.random$w$mean
w.proj <- inla.mesh.projector(mesh_err)
w.pmf <- inla.mesh.project(w.proj, w.pm)


xygrid <- expand.grid(w.proj$x, w.proj$y)
Data3D <- data.frame(x = xygrid[,1],
                     y = xygrid[,2],
                     z = melt(w.pmf)[,3])
names(Data3D) <- c("x", "y", "z")

p_sf_err <- ggplot(Data3D,
                   aes(x, y, z = z),
                   col = rgb(1, 0.5, 0.5, 0.7)) +
  stat_contour(geom="polygon", aes(fill = ..level..)) +
  geom_raster(aes(fill = z)) +
  labs(fill="w.pm")+
  # xlim(23231,23332)+
  # ylim(197851,197902)+
  geom_polygon(data=fortify(Outraster), aes(x=long/1000, y=lat/1000),
               #fill="white",
               alpha=0.5,
               color="white", inherit.aes=FALSE)+
  #geom_polygon(data=fortify(Outraster), aes(x=lat, y=long, z=NULL), color="black", fill=NA)+
  #stat_contour(geom="polygon", aes(fill = ..level..)) +
  #stat_contour(aes(colour = ..level..))+
  xlab("x (m)")+
  ylab("y (m)")+
  xlim(c(22.100, 39.900))+
  ylim(c(198.700, 209.000))+
  scale_fill_distiller(palette="PuOr")+
  theme_classic()+
  coord_fixed(ratio = 1)
p_sf_err
## Warning: Removed 6488 rows containing non-finite values (stat_contour).

## Warning: Removed 6315 rows containing missing values (geom_raster).

#Predictions ~ raw data of H and P

#####Predicties ~ covariates
p_pred_diff_P <- ggplot(data= vegetation, aes(x=P, y=Pred_diff_sp))+
  geom_point()+
  ggtitle("Predicition Diff ~ P")
p_pred_diff_H <- ggplot(data= vegetation, aes(x=H, y=Pred_diff_sp))+
  geom_point()+
  ggtitle("Predicitions Diff ~ H")
p_pred_err_P <- ggplot(data= vegetation, aes(x=P, y=exp(Pred_err_sp)))+
  geom_point()+
  ggtitle("Predicitions Error ~ P")
p_pred_err_H <- ggplot(data= vegetation, aes(x=H, y=exp(Pred_err_sp)))+
  geom_point()+
  ggtitle("Predicitions Error ~ H")

grid.arrange(p_pred_diff_P, p_pred_diff_H, p_pred_err_P,p_pred_err_H, nrow=2, ncol=2)

##3D plots
plot_3d_diff <- plot_ly(vegetation, 
             x = ~P, y=~H, z =~Pred_diff_sp,
             color=~Fit_diff_sp) %>%
  add_markers() %>%
  layout(title = "Predicted values Difference",
         scene = list(xaxis=list(title="P"),
                      yaxis=list(title="H"),
                      zaxis=list(title="Pred Difference")))
plot_3d_diff
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plot_3d_err <- plot_ly(vegetation, 
             x = ~P, y=~H, z =~Pred_err_sp,
             color=~Fit_err_sp) %>%
  add_markers() %>%
  layout(title = "Predicted values Error",
         scene = list(xaxis=list(title="P"),
                      yaxis=list(title="H"),
                      zaxis=list(title="Pred Error")))
plot_3d_err

#Plots for interpolations

p_int_diff_P <- ggplot(data= filter(interpol_df, H==32.5 |H==57.5),
                       aes(x=Pstd, y=Int_diff_mean, col=as.factor(H)))+
  geom_point()+
  ggtitle("Interpolations Diff ~ P")
p_int_diff_H <- ggplot(data= filter(interpol_df, Pstd==0.25 |Pstd==0.75),
                       aes(x=H, y=Int_diff_mean, col=as.factor(Pstd)))+
  geom_point()+
  ggtitle("Interpolations Diff ~ H")
p_int_err_P <- ggplot(data= filter(interpol_df, H==32.5 |H==57.5),
                      aes(x=Pstd, y=exp(Int_err_mean), col=as.factor(H) ))+
  geom_point()+
  ggtitle("Interpolations Error ~ P")
p_int_err_H <- ggplot(data= filter(interpol_df, Pstd==0.25 |Pstd==0.75),
                      aes(x=H, y=exp(Int_err_mean), col=as.factor(Pstd)))+
  geom_point()+
  ggtitle("Interpolations Error ~ H")

grid.arrange(p_int_diff_P, p_int_diff_H, p_int_err_P,p_int_err_H, nrow=2, ncol=2)

#3Dplot
Interpol_diff_3D <- plot_ly(data = interpol_df, x = ~H, y = ~Pstd, z=~Int_diff_mean, color=~Int_diff_mean)  %>% add_markers() %>%
  layout(title = "Change (Difference) in dune height",
  scene = list(
    xaxis = list(title = "JC"),
    yaxis = list(title = "P"),
    zaxis = list(title = "Height change (m)")))
Interpol_diff_3D
Interpol_err_3D <- plot_ly(data = interpol_df, x = ~H, y = ~Pstd, z=~exp(Int_err_mean), color=~exp(Int_err_mean))  %>% add_markers() %>%
  layout(title = "Variability in change (Error) in dune height",
  scene = list(
    xaxis = list(title = "JC"),
    yaxis = list(title = "P"),
    zaxis = list(title = "Var height change (m)")))
Interpol_err_3D
#write.csv(interpol_df, "output/Interpolations_INLA_fielddata.csv")
#write.csv(vegetation, "output/Data_and_predictions_INLA_fielddata.csv")